We first zoom in the sewing team since there’s more data available and no missing value contained. We want to explore the data from following perspectives:
How’s productivity affected by each variables? (simply looking at the time series plots)
Is there a team/teams that outperformed the rest? What set them apart?
Is it possible if teams collaborate?
How does the workflow look like?
data <- read.csv("/Users/karawei/Desktop/GRAD 3rd/ESE 527/garments_worker_productivity.csv")
#data <- read.csv("C:/Users/ThinkPad/Documents/GitHub/OK/worker_productivity.csv")
data_org <- read.csv("/Users/karawei/Desktop/GRAD 3rd/ESE 527/garments_worker_productivity.csv")
#data_org <- read.csv("C:/Users/ThinkPad/Documents/GitHub/OK/worker_productivity.csv")
data$department = replace(data$department, data$department=="sweing", "sewing")
data_org$department = replace(data_org$department, data_org$department=="finishing ", "finishing")
data_org$department = replace(data_org$department, data_org$department=="sweing", "sewing")
for (j in 1:length(data$department)){
if(data$department[j]=="sewing"){data$department[j]<-1}
else if(data$department[j]=="finishing "){data$department[j]<-2}
else if(data$department[j]=="finishing"){data$department[j]<-2}
}
# change day of the week to number Monday=1, Sunday=7
for (i in 1:length(data$day)){
if(data$day[i]=="Monday"){data$day[i]<-as.integer(1)}
else if(data$day[i]=="Tuesday"){data$day[i]<-as.integer(2)}
else if(data$day[i]=="Wednesday"){data$day[i]<-as.integer(3)}
else if(data$day[i]=="Thursday"){data$day[i]<-as.integer(4)}
else if(data$day[i]=="Friday"){data$day[i]<-as.integer(5)}
else if(data$day[i]=="Saturday"){data$day[i]<-as.integer(6)}
else if(data$day[i]=="Sunday"){data$day[i]<-as.integer(7)}
}
# changing the date to date format
data$date <- as.Date(data$date,format="%m/%d/%Y" )
#Changing Quarter to numbers
for (k in 1:length(data$quarter)){
if(data$quarter[k]=="Quarter1"){data$quarter[k]<-as.integer(1)}
else if(data$quarter[k]=="Quarter2"){data$quarter[k]<-as.integer(2)}
else if(data$quarter[k]=="Quarter3"){data$quarter[k]<-as.integer(3)}
else if(data$quarter[k]=="Quarter4"){data$quarter[k]<-as.integer(4)}
else if(data$quarter[k]=="Quarter5"){data$quarter[k]<-as.integer(5)}
}
sewing<-subset(data,data$department==1)
finishing<-subset(data,data$department==2)
sewingTeam01<-subset(sewing,sewing$team==1)
sewingTeam02<-subset(sewing,sewing$team==2)
sewingTeam03<-subset(sewing,sewing$team==3)
sewingTeam04<-subset(sewing,sewing$team==4)
sewingTeam05<-subset(sewing,sewing$team==5)
sewingTeam06<-subset(sewing,sewing$team==6)
sewingTeam07<-subset(sewing,sewing$team==7)
sewingTeam08<-subset(sewing,sewing$team==8)
sewingTeam09<-subset(sewing,sewing$team==9)
sewingTeam10<-subset(sewing,sewing$team==10)
sewingTeam11<-subset(sewing,sewing$team==11)
sewingTeam12<-subset(sewing,sewing$team==12)
finishingTeam01<-subset(finishing,finishing$team==1)
finishingTeam02<-subset(finishing,finishing$team==2)
finishingTeam03<-subset(finishing,finishing$team==3)
finishingTeam04<-subset(finishing,finishing$team==4)
finishingTeam05<-subset(finishing,finishing$team==5)
finishingTeam06<-subset(finishing,finishing$team==6)
finishingTeam07<-subset(finishing,finishing$team==7)
finishingTeam08<-subset(finishing,finishing$team==8)
finishingTeam09<-subset(finishing,finishing$team==9)
finishingTeam10<-subset(finishing,finishing$team==10)
finishingTeam11<-subset(finishing,finishing$team==11)
finishingTeam12<-subset(finishing,finishing$team==12)
head(data_org)
## date quarter department day team targeted_productivity smv wip
## 1 1/1/2015 Quarter1 sewing Thursday 8 0.80 26.16 1108
## 2 1/1/2015 Quarter1 finishing Thursday 1 0.75 3.94 NA
## 3 1/1/2015 Quarter1 sewing Thursday 11 0.80 11.41 968
## 4 1/1/2015 Quarter1 sewing Thursday 12 0.80 11.41 968
## 5 1/1/2015 Quarter1 sewing Thursday 6 0.80 25.90 1170
## 6 1/1/2015 Quarter1 sewing Thursday 7 0.80 25.90 984
## over_time incentive idle_time idle_men no_of_style_change no_of_workers
## 1 7080 98 0 0 0 59.0
## 2 960 0 0 0 0 8.0
## 3 3660 50 0 0 0 30.5
## 4 3660 50 0 0 0 30.5
## 5 1920 50 0 0 0 56.0
## 6 6720 38 0 0 0 56.0
## actual_productivity
## 1 0.9407254
## 2 0.8865000
## 3 0.8005705
## 4 0.8005705
## 5 0.8003819
## 6 0.8001250
We first convert all the categorical variables into dummy variables. Beause the dataset only has wip values for the sewing department, we start to think whether we should split the dataset into two subcategories - finishing department and sewing department.
# par(mfrow = c(3, 4))
# hist(data[["actual_productivity"]],main="Productivity Index",xlab="Productivity Index")
# hist(data[["targeted_productivity"]],main="Target Productivity",xlab="Productivity Index")
# hist(data[["smv"]],main="Standard Minute Value",xlab="Minutes Needed for Task")
# hist(data[["over_time"]],main="Over Time",xlab="Minutes of Over Time")
# hist(data[["wip"]],main="Work in Progress",xlab="No. of Unfinished Products")
# hist(data[["incentive"]],main="Financial Incentive",xlab="Bangladash Taka(BDT)",breaks=20)
# hist(data[["idle_time"]],main="Idle Time",xlab="Interrupted Time")
# hist(data[["idle_men"]],main="Idle Workers",xlab="No. of Idle Workers When Production was Interrupted")
# hist(data[["no_of_style_change"]],main="No. of Style Change",xlab="Number of Style Change")
# hist(data[["no_of_workers"]],main="No. of Workers/Team",xlab="Number of Workers/Team")
par(mfrow = c(3, 4))
hist(data[["actual_productivity"]],xlab="Actual Productivity", main = "")
hist(data[["targeted_productivity"]],xlab="Targeted Productivity",, main = "")
hist(data[["smv"]],xlab="Minutes Needed for Task",, main = "")
hist(data[["over_time"]],xlab="Minutes of Over Time", main = "")
hist(data[["wip"]],xlab="No. of Unfinished Products", main = "")
hist(data[["incentive"]],xlab="Fiancial Incentive",breaks=20, main = "")
hist(data[["idle_time"]],xlab="Interrupted Time", main = "")
hist(data[["idle_men"]],xlab="Interrupted Workers", main = "")
hist(data[["no_of_style_change"]],xlab="Number of Style Change", main = "")
hist(data[["no_of_workers"]],xlab="Number of Workers/Team", main = "")
mtext("Histogram Subplots", side = 3, line = 25, cex = 1.5)
We want to understand the overall data structure and hope to catch any patters. We then draw the frequency distribution of all numerical variables. From the numerical variables, we notice that there are tails for several variables:target productivity, over time, financial incentives, etc.
# par(mfrow = c(3, 4))
# boxplot(data[["actual_productivity"]],main="Productivity Index",xlab="Productivity Index")
# boxplot(data[["targeted_productivity"]],main="Target Productivity",xlab="Productivity Index")
# boxplot(data[["smv"]],main="Standard Minute Value",xlab="Minutes Needed for Task")
# boxplot(data[["over_time"]],main="Over Time",xlab="Minutes of Over Time")
# boxplot(data[["wip"]],main="Work in Progress",xlab="Number of Unfinished Products")
# boxplot(data[["incentive"]],main="Financial Incentive",xlab="Bangladash Taka(BDT)",breaks=200)
# boxplot(data[["idle_time"]],main="Idle Time",xlab="Amount of Time When Production was Interrupted")
# boxplot(data[["idle_men"]],main="Idle Workers",xlab="Number of Idle Workers When Production was Interrupted")
# boxplot(data[["no_of_style_change"]],main="Number of Style Change",xlab="Number of Style Change")
# boxplot(data[["no_of_workers"]],main="Number of Workers in a Team",xlab="Number of Workers in a Team")
par(mfrow = c(3, 4))
boxplot(data[["actual_productivity"]],main="",xlab="Actual Productivity")
boxplot(data[["targeted_productivity"]],main="",xlab="Targeted Productivity")
boxplot(data[["smv"]],main="",xlab="Minutes Needed for Task")
boxplot(data[["over_time"]],main="",xlab="Minutes of Over Time")
boxplot(data[["wip"]],main="",xlab="No. of Unfinished Products")
boxplot(data[["incentive"]],main="",xlab="Financial Incentive",breaks=200)
boxplot(data[["idle_time"]],main="",xlab="Interrupted Time")
boxplot(data[["idle_men"]],main="",xlab="Interrupted Workers")
boxplot(data[["no_of_style_change"]],main="",xlab="No. of Style Change")
boxplot(data[["no_of_workers"]],main="",xlab="No. of Workers/Team")
mtext("Boxplot Subplots", side = 3, line = 25, cex = 1.5)
We further investigate the distribution via box plots. From the box plots, we see that most of the variables have outliers. This confirms what we see from the histogram - heav tails for most variables.
To make sure we fit the right model & knowing from the fact that time series variables are generally correlated, we develop the correlation matrix. We define high correlation to be such that $|Cor(X,Y)| > 0.7, X, Y $ \(\{\)numerical variables\(\}\).
correlation_matrix <- cor(data[6:15])
ggcorrplot(correlation_matrix,lab = TRUE)
# Calculate the correlation matrix for columns 6 to 15
test_correlation_matrix <- cor(data[6:15])
# Convert the correlation matrix into a data frame
T_C_df <- as.data.frame(as.table(test_correlation_matrix))
# Create a correlation plot using ggplot2
p <- ggplot(T_C_df, aes(Var1, Var2, fill = Freq)) +
geom_tile() +
scale_fill_gradient2(low = "blue3", mid = "white", high = "red3") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 80, hjust = 1)) +
labs(title = "Correlation Matrix for Numerical Variables",fill = "Corr")
# Add labels to the tiles
p + geom_text(aes(label = round(Freq, 2), vjust = 1), color = "black", size = 3) +
scale_x_discrete(name = "Variable Names") +
scale_y_discrete(name = "Variable Names")
## Warning: Removed 18 rows containing missing values (`geom_text()`).
From the correlation matrix, we observe that smv and no_of_workers might be highly correlated (covariance = 0.91). Also over_time and no_of_workers might be highly correlated correlated (covariance = 0.73). We also observe that wip has missing covariance with all numerical variables but itself. After reviewing the distribution of wip. We realize that wip only has values when the department = sewing.
Naturally, We break the dataset into two parts based on the department they belong and revisit the collinearity issue.
We first look at the proportion of data by department.
counts_of_departments = c(length(data_org$department[data_org$department=="sewing"]), length(data_org$department[data_org$department=="finishing"]))
labels_of_departments = c("sewing", "finishing")
departments_pie_labels <- paste0(labels_of_departments, ", ", round(100 * counts_of_departments/sum(counts_of_departments), 2), "%")
# counts_of_quarters = c(length(data_prod$quarter[data_prod$quarter=="Quarter1"]), length(data_prod$quarter[data_prod$quarter=="Quarter2"]),length(data_prod$quarter[data_prod$quarter=="Quarter3"]), length(data_prod$quarter[data_prod$quarter=="Quarter4"]),length(data_prod$quarter[data_prod$quarter=="Quarter5"]))
# labels_of_quarters = c("Quarter1", "Quarter2","Quarter3","Quarter4","Quarter5")
# quarters_pie_labels <- paste0(labels_of_quarters, ", ", round(100 * counts_of_quarters/sum(counts_of_quarters), 2), "%")
# counts_of_days = c(length(data_prod$day[data_prod$day=="Monday"]),
# length(data_prod$day[data_prod$day=="Tuesday"]),
# length(data_prod$day[data_prod$day=="Wednesday"]),
# length(data_prod$day[data_prod$day=="Thursday"]),
# length(data_prod$day[data_prod$day=="Friday"]),
# length(data_prod$day[data_prod$day=="Saturday"]),
# length(data_prod$day[data_prod$day=="Sunday"]))
# labels_of_days = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")
# days_pie_labels <- paste0(labels_of_days, ", ", round(100 * counts_of_days/sum(counts_of_days), 2), "%")
pie(counts_of_departments, labels = departments_pie_labels,main = "Pie Chart of Departments",col = rainbow(length(counts_of_departments)))
#pie(counts_of_quarters, labels = quarters_pie_labels,main = "Pie Chart of Quarters",col = rainbow(length(counts_of_quarters)))
#pie(counts_of_days, labels = days_pie_labels,main = "Pie Chart of Days",col = rainbow(length(counts_of_days)))
We see that sewing department has more corresponding rows than finishing.
# ggplot(gather(data[,-c(1:5)]), aes(key,value)) +
# geom_boxplot() +
# facet_wrap(~key, scales = 'free') +
# labs(title = "Boxplots of all numerical variables")+
# theme_minimal()
data_no_team = data_org[,c(1:4,6:15)]
ggplot(melt(data_no_team),aes(x=department,y=value)) +
facet_wrap(~variable, scales="free") +
geom_boxplot()+
labs(title = "Boxplots of All Numerical Aariables by Departments")+
theme_minimal()+
scale_x_discrete(name = "Departments")
## Using date, quarter, department, day as id variables
## Warning: Removed 506 rows containing non-finite values (`stat_boxplot()`).
If we zoom in the box plots per department, sewing department has higher mean for must of the numerical values. For most of the numerical variables - targeted_productivity, smv, wip, idle_time, idel_men, and no_of_style_change, no_of_workers, and actual_productivity - sewing department have higher outliers. This once support the idea that distributions are different between departments. Thus, we should analyze the dataset seperately.
sewing_correlation_matrix <- cor(sewing[6:14])
S_C_df <-as.data.frame(as.table(sewing_correlation_matrix))
# Create a correlation plot using ggplot2
p<-ggplot(S_C_df, aes(Var1, Var2, fill = Freq)) +
geom_tile() +
scale_fill_gradient(low = "blue", high = "red") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 80, hjust = 1)) +
labs(title = "Correlation Plot")
p+ geom_text(aes(label = round(Freq, 1), vjust = 1), color = "black", size = 3)
#ggcorrplot(sewing_correlation_matrix,lab = TRUE)
We conclude that within the sewing department, we do not see correlation between variables based on the absolute covariance value. Thus, we might assume all variables are independent within the sewing department for now.
finishing_correlation_matrix <- cor(finishing[,c(6:7,9:14)])
## Warning in cor(finishing[, c(6:7, 9:14)]): the standard deviation is zero
ggcorrplot(finishing_correlation_matrix,lab = TRUE)
head(finishing[,c(6:7,9:14)])
## targeted_productivity smv over_time incentive idle_time idle_men
## 2 0.75 3.94 960 0 0 0
## 7 0.75 3.94 960 0 0 0
## 14 0.65 3.94 960 0 0 0
## 15 0.75 2.90 960 0 0 0
## 16 0.75 3.94 2160 0 0 0
## 17 0.80 2.90 960 0 0 0
## no_of_style_change no_of_workers
## 2 0 8
## 7 0 8
## 14 0 8
## 15 0 8
## 16 0 18
## 17 0 8
finishing[finishing["idle_time"]!=0]
## character(0)
finishing[finishing["idle_men"]!=0]
## character(0)
finishing[finishing["no_of_style_change"]!=0]
## character(0)
Notice that lots of the values are missing here for correlation between idle_time, idle_men, and no_of_style_change. That’s because they have 0 values for all observation. Thus, the correlation is \(\frac{0}{0}\).
Then we wonder if teams in one department might collaborate. If their productivity are correlated, then it’s possible that some teams help the others.
finishing_file_path <- "/Users/karawei/Documents/GitHub/OK/combinedProductivityDataforFinishingTeams.csv"
sewing_file_path <- "/Users/karawei/Documents/GitHub/OK/combinedDataforProductivity.csv"
#finishing_file_path <-"C:/Users/ThinkPad/Documents/GitHub/OK/combinedProductivityDataforFinishingTeams.csv"
#sewing_file_path <-"C:/Users/ThinkPad/Documents/GitHub/OK/combinedDataforProductivity.csv"
# Read the CSV file into a data frame
finishing_data <- read.csv(finishing_file_path)
sewing_data <- read.csv(sewing_file_path)
finishing_correlation_matrix <- cor(finishing_data[2:12], use = "complete.obs")
sewing_correlation_matrix <- cor(sewing_data[2:13], use = "complete.obs")
ggcorrplot(finishing_correlation_matrix,lab="true")
ggcorrplot(sewing_correlation_matrix,lab="true")
F_C_df <-as.data.frame(as.table(finishing_correlation_matrix))
S_C_df <-as.data.frame(as.table(sewing_correlation_matrix))
# Create a correlation plot using ggplot2 sewing
p<-ggplot(S_C_df, aes(Var1, Var2, fill = Freq)) +
geom_tile() +
scale_fill_gradient(low = "blue", high = "red") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 80, hjust = 1)) +
labs(title = "Correlation Plot sewing productivity")
p+ geom_text(aes(label = round(Freq, 1), vjust = 1), color = "black", size = 3)
# Create a correlation plot using ggplot2 finishing
p<-ggplot(F_C_df, aes(Var1, Var2, fill = Freq)) +
geom_tile() +
scale_fill_gradient(low = "blue", high = "red") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 80, hjust = 1)) +
labs(title = "Correlation Plot finishing productivity")
p+ geom_text(aes(label = round(Freq, 1), vjust = 1), color = "black", size = 3)
sewing_remove = na.omit((sewing_data[2:13]))
nrow(sewing_remove)
## [1] 44
finishing_remove = na.omit((finishing_data))
nrow(finishing_remove)
## [1] 1
head(finishing_remove)
## date team1productivityFinishing team2productivityFinishing
## 26 01/31/2015 0.9718667 0.9718667
## team3productivityFinishing team4productivityFinishing
## 26 0.9718667 0.9718667
## team5productivityFinishing team6productivityFinishing
## 26 0.9718667 0.9718667
## team7productivityFinishing team8productivityFinishing
## 26 0.9718667 0.9718667
## team9productivityFinishing team10productivityFinishing
## 26 0.9718667 0.9718667
## team11productivityFinishing team12productivityFinishing
## 26 0.9718667 0.9718667
We first combine all productivity for teams into two dataset - one for the finishing department and other one for the sewing department. Because the length for teams are different -> some teams work on more dates and the other. So we remove all the rows with NA values. We are able to run the covariance matrix for team 1 - team 11 of the finishing department. We observe that finishing department has high covariance between almost all teams. . We also run the covariance matrix for team 1 - team 12 of the sewing department. Sewing department has no high correlation between teams. However, there are questions:
After we remove all missing values, the correlation might not be too reliable
I can’t run the solution for team12Productivity Fishing. Because there’s too many missing values. -> In fact, only one row (row 26 on date 01/31/2015) has complete values for producticity on all dates.
I am not sure if calculating the productivity would mean anything statistically. Because we have not yet established relationship between productivity and other variables. And teams collaborate during the manufacturing process instead of at the final productivity. -> sol: But it shows productivity between teams might not be independent.
# ggplot(melt(data_org),aes(x=department)) +
# facet_wrap(~variable, scales="free") +
# geom_bar()+
# labs(title = "Bar charts of all numerical variables by departments")+
# theme_minimal()
From both the histograms and boxplots, we realize there are outliers for the sewing department. To deal with such problems, we
tinna
#Incentive
Between teams by department, we want to know as time goes, whether the change of productivity can be observed from other variables. I.e. we investigate the patterns of productivity v.s. variables.
ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam01$incentive/max(sewingTeam01$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam01$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam01$smv/max(sewingTeam01$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam01$wip/max(sewingTeam01$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam01$over_time/max(sewingTeam01$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam01$idle_time, color = "Idle_time"))+
geom_line(aes(y = sewingTeam01$idle_men, color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam01$no_of_style_change/max(sewingTeam01$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam01$no_of_workers/max(sewingTeam01$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam1 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam02, aes(x = sewingTeam02$date)) +
geom_line(aes(y= sewingTeam02$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam02$incentive/max(sewingTeam02$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam02$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam02$smv/max(sewingTeam02$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam02$wip/max(sewingTeam02$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam02$over_time/max(sewingTeam02$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam02$idle_time/max(sewingTeam02$idle_time), color = "Idle_time"))+
geom_line(aes(y = sewingTeam02$idle_men/max(sewingTeam02$idle_men), color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam02$no_of_style_change/max(sewingTeam02$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam02$no_of_workers/max(sewingTeam02$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam2 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam03, aes(x = sewingTeam03$date)) +
geom_line(aes(y= sewingTeam03$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam03$incentive/max(sewingTeam03$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam03$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam03$smv/max(sewingTeam03$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam03$wip/max(sewingTeam03$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam03$over_time/max(sewingTeam03$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam03$idle_time, color = "Idle_time"))+
geom_line(aes(y = sewingTeam03$idle_men, color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam03$no_of_style_change/max(sewingTeam03$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam03$no_of_workers/max(sewingTeam03$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam3 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam04, aes(x = sewingTeam04$date)) +
geom_line(aes(y= sewingTeam04$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam04$incentive/max(sewingTeam04$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam04$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam04$smv/max(sewingTeam04$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam04$wip/max(sewingTeam04$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam04$over_time/max(sewingTeam04$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam04$idle_time/max(sewingTeam04$idle_time), color = "Idle_time"))+
geom_line(aes(y = sewingTeam04$idle_men/max(sewingTeam04$idle_men), color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam04$no_of_style_change/max(sewingTeam04$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam04$no_of_workers/max(sewingTeam04$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam4 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam05, aes(x = sewingTeam05$date)) +
geom_line(aes(y= sewingTeam05$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam05$incentive/max(sewingTeam05$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam05$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam05$smv/max(sewingTeam05$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam05$wip/max(sewingTeam05$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam05$over_time/max(sewingTeam05$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam05$idle_time/max(sewingTeam05$idle_time), color = "Idle_time"))+
geom_line(aes(y = sewingTeam05$idle_men/max(sewingTeam05$idle_men), color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam05$no_of_style_change/max(sewingTeam05$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam05$no_of_workers/max(sewingTeam05$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam5 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam06, aes(x = sewingTeam06$date)) +
geom_line(aes(y= sewingTeam06$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam06$incentive/max(sewingTeam06$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam06$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam06$smv/max(sewingTeam06$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam06$wip/max(sewingTeam06$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam06$over_time/max(sewingTeam06$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam06$idle_time, color = "Idle_time"))+
geom_line(aes(y = sewingTeam06$idle_men, color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam06$no_of_style_change/max(sewingTeam06$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam06$no_of_workers/max(sewingTeam06$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam6 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam07, aes(x = sewingTeam07$date)) +
geom_line(aes(y= sewingTeam07$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam07$incentive/max(sewingTeam07$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam07$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam07$smv/max(sewingTeam07$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam07$wip/max(sewingTeam07$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam07$over_time/max(sewingTeam07$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam07$idle_time/max(sewingTeam07$idle_time), color = "Idle_time"))+
geom_line(aes(y = sewingTeam07$idle_men/max(sewingTeam07$idle_men), color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam07$no_of_style_change/max(sewingTeam07$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam07$no_of_workers/max(sewingTeam07$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam7 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam08, aes(x = sewingTeam08$date)) +
geom_line(aes(y= sewingTeam08$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam08$incentive/max(sewingTeam08$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam08$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam08$smv/max(sewingTeam08$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam08$wip/max(sewingTeam08$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam08$over_time/max(sewingTeam08$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam08$idle_time/max(sewingTeam08$idle_time), color = "Idle_time"))+
geom_line(aes(y = sewingTeam08$idle_men/max(sewingTeam08$idle_men), color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam08$no_of_style_change/max(sewingTeam08$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam08$no_of_workers/max(sewingTeam08$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam8 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam09, aes(x = sewingTeam09$date)) +
geom_line(aes(y= sewingTeam09$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam09$incentive/max(sewingTeam09$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam09$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam09$smv/max(sewingTeam09$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam09$wip/max(sewingTeam09$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam09$over_time/max(sewingTeam09$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam09$idle_time, color = "Idle_time"))+
geom_line(aes(y = sewingTeam09$idle_men, color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam09$no_of_style_change/max(sewingTeam09$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam09$no_of_workers/max(sewingTeam09$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam9 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam10, aes(x = sewingTeam10$date)) +
geom_line(aes(y= sewingTeam10$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam10$incentive/max(sewingTeam10$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam10$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam10$smv/max(sewingTeam10$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam10$wip/max(sewingTeam10$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam10$over_time/max(sewingTeam10$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam10$idle_time/max(sewingTeam10$idle_time), color = "Idle_time"))+
geom_line(aes(y = sewingTeam10$idle_men/max(sewingTeam10$idle_men), color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam10$no_of_style_change/max(sewingTeam10$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam10$no_of_workers/max(sewingTeam10$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam10 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam11, aes(x = sewingTeam11$date)) +
geom_line(aes(y= sewingTeam11$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam11$incentive/max(sewingTeam11$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam11$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam11$smv/max(sewingTeam11$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam11$wip/max(sewingTeam11$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam11$over_time/max(sewingTeam11$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam11$idle_time/max(sewingTeam11$idle_time), color = "Idle_time"))+
geom_line(aes(y = sewingTeam11$idle_men/max(sewingTeam11$idle_men), color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam11$no_of_style_change/max(sewingTeam11$no_of_style_change), color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam11$no_of_workers/max(sewingTeam11$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam11 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam12, aes(x = sewingTeam12$date)) +
geom_line(aes(y= sewingTeam12$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam12$incentive/max(sewingTeam12$incentive), color = "Normalized Incentive"))+
geom_line(aes(y = sewingTeam12$targeted_productivity, color = "Targeted Productivity"))+
geom_line(aes(y = sewingTeam12$smv/max(sewingTeam12$smv), color = "Normalized SMV"))+
geom_line(aes(y = sewingTeam12$wip/max(sewingTeam12$wip), color = "Normalized WIP"))+
geom_line(aes(y = sewingTeam12$over_time/max(sewingTeam12$over_time), color = "Normalized Overtime"))+
geom_line(aes(y = sewingTeam12$idle_time, color = "Idle_time"))+
geom_line(aes(y = sewingTeam12$idle_men, color = "Idle_Worker"))+
geom_line(aes(y = sewingTeam12$no_of_style_change, color = "Normalized No. of style Change"))+
geom_line(aes(y = sewingTeam12$no_of_workers/max(sewingTeam12$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = "Scale as Productivity", title = "SewingTeam12 Productivity vs Other Variables")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Incentive" = "red","Targeted Productivity" = "blue", "Normalized SMV" = "green", "Normalized WIP" = "purple", "Normalized Overtime" = "magenta","Idle_time" = "cyan", "Idle_Worker" = "brown", "Normalized No. of style Change" = "pink", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
sewingTeam01$idle_time[sewingTeam01$idle_time == max(sewingTeam01$idle_time)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam02$idle_time[sewingTeam02$idle_time == max(sewingTeam02$idle_time)]
## [1] 6.5
sewingTeam03$idle_time[sewingTeam03$idle_time == max(sewingTeam03$idle_time)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam04$idle_time[sewingTeam04$idle_time == max(sewingTeam04$idle_time)]
## [1] 150
sewingTeam05$idle_time[sewingTeam05$idle_time == max(sewingTeam05$idle_time)]
## [1] 90
sewingTeam06$idle_time[sewingTeam06$idle_time == max(sewingTeam06$idle_time)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam07$idle_time[sewingTeam07$idle_time == max(sewingTeam07$idle_time)]
## [1] 270
sewingTeam08$idle_time[sewingTeam08$idle_time == max(sewingTeam08$idle_time)]
## [1] 300
sewingTeam09$idle_time[sewingTeam09$idle_time == max(sewingTeam09$idle_time)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam10$idle_time[sewingTeam10$idle_time == max(sewingTeam10$idle_time)]
## [1] 8 8
sewingTeam11$idle_time[sewingTeam11$idle_time == max(sewingTeam11$idle_time)]
## [1] 4
sewingTeam12$idle_time[sewingTeam12$idle_time == max(sewingTeam12$idle_time)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam01$idle_men[sewingTeam01$idle_men == max(sewingTeam01$idle_men)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam02$idle_men[sewingTeam02$idle_men == max(sewingTeam02$idle_men)]
## [1] 30
sewingTeam03$idle_men[sewingTeam03$idle_men == max(sewingTeam03$idle_men)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam04$idle_men[sewingTeam04$idle_men == max(sewingTeam04$idle_men)]
## [1] 15
sewingTeam05$idle_men[sewingTeam05$idle_men == max(sewingTeam05$idle_men)]
## [1] 25
sewingTeam06$idle_men[sewingTeam06$idle_men == max(sewingTeam06$idle_men)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam07$idle_men[sewingTeam07$idle_men == max(sewingTeam07$idle_men)]
## [1] 45
sewingTeam08$idle_men[sewingTeam08$idle_men == max(sewingTeam08$idle_men)]
## [1] 37
sewingTeam09$idle_men[sewingTeam09$idle_men == max(sewingTeam09$idle_men)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sewingTeam10$idle_men[sewingTeam10$idle_men == max(sewingTeam10$idle_men)]
## [1] 35 35
sewingTeam11$idle_men[sewingTeam11$idle_men == max(sewingTeam11$idle_men)]
## [1] 20
sewingTeam12$idle_men[sewingTeam12$idle_men == max(sewingTeam12$idle_men)]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
# geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
# geom_line(aes(y = sewingTeam01$incentive/max(sewingTeam01$incentive), color = "Normalized Incentive"))+
# labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Targeted Normalized Incentive")+
# scale_color_manual(values = c("Actual Productivity" = "black","Normalized Incentive" = "red")) +
# theme_minimal() +
# theme(legend.position = "top")
#Target Productivity ######################################################################################
ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam01$targeted_productivity, color = "Targeted Productivity"))+
labs(x="Date", y = "Productivity", title = "SewingTeam1 Productivity vs Targeted Productivity")+
scale_color_manual(values = c("Actual Productivity" = "black", "Targeted Productivity" = "blue")) +
theme_minimal() +
theme(legend.position = "top")
#SMV ######################################################################################
ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam01$smv/max(sewingTeam01$smv), color = "Normalized SMV"))+
labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Normalized SMV")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized SMV" = "green")) +
theme_minimal() +
theme(legend.position = "top")
#WIP #####################################################################################
ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam01$wip/max(sewingTeam01$wip), color = "Normalized WIP"))+
labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Normalized WIP")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized WIP" = "purple")) +
theme_minimal() +
theme(legend.position = "top")
#overtime ###########################################################################################################
ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam01$over_time/max(sewingTeam01$over_time), color = "Normalized Overtime"))+
labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Normalized Overtime")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized Overtime" = "magenta")) +
theme_minimal() +
theme(legend.position = "top")
#Idle_time ############################################################################################################
ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam01$idle_time, color = "Idle_time"))+
labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Idle_time")+
scale_color_manual(values = c("Actual Productivity" = "black", "Idle_time" = "cyan")) +
theme_minimal() +
theme(legend.position = "top")
#Idle_men ############################################################################################################
ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam01$idle_men, color = "Idle_Worker"))+
labs(x="Date", y = "Productivity", title = "SewingTeam1 Productivity vs Idle_Worker")+
scale_color_manual(values = c("Actual Productivity" = "black", "Idle_Worker" = "brown")) +
theme_minimal() +
theme(legend.position = "top")
#No. of style Change ############################################################################################################
ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam01$no_of_style_change/max(sewingTeam01$no_of_style_change), color = "Normalized No. of style Change"))+
labs(x="Date", y = "Productivity", title = "SewingTeam1 Productivity vs Normalized No. of style Change")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized No. of style Change" = "pink")) +
theme_minimal() +
theme(legend.position = "top")
#Normalized No. of workers
ggplot(data = sewingTeam01, aes(x = sewingTeam01$date)) +
geom_line(aes(y= sewingTeam01$actual_productivity,color="Actual Productivity"))+
geom_line(aes(y = sewingTeam01$no_of_workers/max(sewingTeam01$no_of_workers), color = "Normalized No. of workers"))+
labs(x="Date", y = NA, title = "SewingTeam1 Productivity vs Normalized No. of workers")+
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized No. of workers" = "orange")) +
theme_minimal() +
theme(legend.position = "top")
ggplot(data = sewingTeam01, aes(x = date)) +
geom_line(aes(y = actual_productivity, color = "Actual Productivity")) +
geom_line(aes(y = incentive / max(incentive), color = "Normalized No. of workers")) +
labs(x = "Date", y = "Productivity", title = "SewingTeam1 Productivity vs Normalized Incentive") +
scale_color_manual(values = c("Actual Productivity" = "black", "Normalized No. of workers" = "red")) +
theme_minimal() +
theme(legend.position = "top") +
guides(color = guide_legend(title = NULL))
# Team01
mean01=mean(sewingTeam01$actual_productivity)
var01 =var(sewingTeam01$actual_productivity)
meanWorker01=mean(sewingTeam01$no_of_workers)
meanTP01=mean(sewingTeam01$targeted_productivity)
meanSMV01=mean(sewingTeam01$smv)
meanWIP01=mean(sewingTeam01$wip)
meanInsentive01=mean(sewingTeam01$incentive)
meanOvertime01=mean(sewingTeam01$over_time)
#Team02
mean02=mean(sewingTeam02$actual_productivity)
var02 =var(sewingTeam02$actual_productivity)
meanWorker02=mean(sewingTeam02$no_of_workers)
meanTP02=mean(sewingTeam02$targeted_productivity)
meanSMV02=mean(sewingTeam02$smv)
meanWIP02=mean(sewingTeam02$wip)
meanInsentive02=mean(sewingTeam02$incentive)
meanOvertime02=mean(sewingTeam02$over_time)
#Team03
mean03=mean(sewingTeam03$actual_productivity)
var03 =var(sewingTeam03$actual_productivity)
meanWorker03=mean(sewingTeam03$no_of_workers)
meanTP03=mean(sewingTeam03$targeted_productivity)
meanSMV03=mean(sewingTeam03$smv)
meanWIP03=mean(sewingTeam03$wip)
meanInsentive03=mean(sewingTeam03$incentive)
meanOvertime03=mean(sewingTeam03$over_time)
#Team04
mean04=mean(sewingTeam04$actual_productivity)
var04 =var(sewingTeam04$actual_productivity)
meanWorker04=mean(sewingTeam04$no_of_workers)
meanTP04=mean(sewingTeam04$targeted_productivity)
meanSMV04=mean(sewingTeam04$smv)
meanWIP04=mean(sewingTeam04$wip)
meanInsentive04=mean(sewingTeam04$incentive)
meanOvertime04=mean(sewingTeam04$over_time)
#Team05
mean05=mean(sewingTeam05$actual_productivity)
var05 =var(sewingTeam05$actual_productivity)
meanWorker05=mean(sewingTeam05$no_of_workers)
meanTP05=mean(sewingTeam05$targeted_productivity)
meanSMV05=mean(sewingTeam05$smv)
meanWIP05=mean(sewingTeam05$wip)
meanInsentive05=mean(sewingTeam05$incentive)
meanOvertime05=mean(sewingTeam05$over_time)
#Team06
mean06=mean(sewingTeam06$actual_productivity)
var06 =var(sewingTeam06$actual_productivity)
meanWorker06=mean(sewingTeam06$no_of_workers)
meanTP06=mean(sewingTeam06$targeted_productivity)
meanSMV06=mean(sewingTeam05$smv)
meanWIP06=mean(sewingTeam06$wip)
meanInsentive06=mean(sewingTeam06$incentive)
meanOvertime06=mean(sewingTeam06$over_time)
#Team07
mean07=mean(sewingTeam07$actual_productivity)
var07 =var(sewingTeam07$actual_productivity)
meanWorker07=mean(sewingTeam07$no_of_workers)
meanTP07=mean(sewingTeam07$targeted_productivity)
meanSMV07=mean(sewingTeam07$smv)
meanWIP07=mean(sewingTeam07$wip)
meanInsentive07=mean(sewingTeam07$incentive)
meanOvertime07=mean(sewingTeam07$over_time)
#Team08
mean08=mean(sewingTeam08$actual_productivity)
var08 =var(sewingTeam08$actual_productivity)
meanWorker08=mean(sewingTeam08$no_of_workers)
meanTP08=mean(sewingTeam08$targeted_productivity)
meanSMV08=mean(sewingTeam08$smv)
meanWIP08=mean(sewingTeam08$wip)
meanInsentive08=mean(sewingTeam08$incentive)
meanOvertime08=mean(sewingTeam08$over_time)
#Team09
mean09=mean(sewingTeam09$actual_productivity)
var09 =var(sewingTeam09$actual_productivity)
meanWorker09=mean(sewingTeam09$no_of_workers)
meanTP09=mean(sewingTeam09$targeted_productivity)
meanSMV09=mean(sewingTeam09$smv)
meanWIP09=mean(sewingTeam09$wip)
meanInsentive09=mean(sewingTeam09$incentive)
meanOvertime09=mean(sewingTeam09$over_time)
#Team10
mean10=mean(sewingTeam10$actual_productivity)
var10 =var(sewingTeam10$actual_productivity)
meanWorker10=mean(sewingTeam10$no_of_workers)
meanTP10=mean(sewingTeam10$targeted_productivity)
meanSMV10=mean(sewingTeam10$smv)
meanWIP10=mean(sewingTeam10$wip)
meanInsentive10=mean(sewingTeam10$incentive)
meanOvertime10=mean(sewingTeam10$over_time)
#Team11
mean11=mean(sewingTeam11$actual_productivity)
var11 =var(sewingTeam11$actual_productivity)
meanWorker11=mean(sewingTeam11$no_of_workers)
meanTP11=mean(sewingTeam11$targeted_productivity)
meanSMV11=mean(sewingTeam11$smv)
meanWIP11=mean(sewingTeam11$wip)
meanInsentive11=mean(sewingTeam11$incentive)
meanOvertime11=mean(sewingTeam11$over_time)
#Team12
mean12=mean(sewingTeam12$actual_productivity)
var12 =var(sewingTeam12$actual_productivity)
meanWorker12=mean(sewingTeam12$no_of_workers)
meanTP12=mean(sewingTeam12$targeted_productivity)
meanSMV12=mean(sewingTeam12$smv)
meanWIP12=mean(sewingTeam12$wip)
meanInsentive12=mean(sewingTeam12$incentive)
meanOvertime12=mean(sewingTeam12$over_time)
meanOfSewing <- c(mean01,mean02,mean03,mean04,mean05,mean06,mean07,mean08,mean09,mean10,mean11,mean12)
meanofWorkers<-c(meanWorker01,meanWorker02,meanWorker03,meanWorker04,meanWorker05,meanWorker06,meanWorker07,meanWorker08,meanWorker09,meanWorker10,meanWorker11,meanWorker12)
meanTP<-c(meanTP01,meanTP02,meanTP03,meanTP04,meanTP05,meanTP06,meanTP07,meanTP08,meanTP09,meanTP10,meanTP11,meanTP12)
meanSMV<-c(meanSMV01,meanSMV02,meanSMV03,meanSMV04,meanSMV05,meanSMV06,meanSMV07,meanSMV08,meanSMV09,meanSMV10,meanSMV11,meanSMV12)
meanWIP<-c(meanWIP01,meanWIP02,meanWIP03,meanWIP04,meanWIP05,meanWIP06,meanWIP07,meanWIP08,meanWIP09,meanWIP10,meanWIP11,meanWIP12)
meanInsentive<-c(meanInsentive01,meanInsentive02,meanInsentive03,meanInsentive04,meanInsentive05,meanInsentive06,meanInsentive07,meanInsentive08,meanInsentive09,meanInsentive10,meanInsentive11,meanInsentive12)
meanOvertime<-c(meanOvertime01, meanOvertime02,meanOvertime03,meanOvertime04,meanOvertime05,meanOvertime06,meanOvertime07,meanOvertime08,meanOvertime09,meanOvertime10,meanOvertime11,meanOvertime12)
#investigate relationship between variables and productivity
plot(meanOfSewing)
hist(meanOfSewing)
plot(meanofWorkers,meanOfSewing,main="Mean productivity vs Mean Number of workers")
plot(meanTP,meanOfSewing,main="Mean productivity vs Mean Target Productivity")
plot(meanSMV,meanOfSewing,main="Mean productivity vs Mean SMV")
plot(meanWIP,meanOfSewing,main="Mean productivity vs Mean WIP")
plot(meanInsentive,meanOfSewing,main="Mean productivity vs Mean Incentive")
plot(meanOvertime,meanOfSewing,main="Mean productivity vs Mean Overtime")
# Explanation behind these plots,
#The goal of these plots is to see why some teams perform better than others.
#Is it because they have more people on their team, is it because they are getting paid more.
# SO we took the mean of some of the variables by team and plotted them against the mean of each team's productivity
# this showed us some relationships that revealed to us what variables on agerage effected the productivity of sewing and
#which variables didn't.
#box plot of productivity by team
par(mfrow = c(3, 4))
boxplot(sewingTeam01$actual_productivity,main="Productivity Index",xlab="Sewing Team01")
boxplot(sewingTeam02$actual_productivity,main="Productivity Index",xlab="Sewing Team02")
boxplot(sewingTeam03$actual_productivity,main="Productivity Index",xlab="Sewing Team03")
boxplot(sewingTeam04$actual_productivity,main="Productivity Index",xlab="Sewing Team04")
boxplot(sewingTeam05$actual_productivity,main="Productivity Index",xlab="Sewing Team05")
boxplot(sewingTeam06$actual_productivity,main="Productivity Index",xlab="Sewing Team06")
boxplot(sewingTeam07$actual_productivity,main="Productivity Index",xlab="Sewing Team07")
boxplot(sewingTeam08$actual_productivity,main="Productivity Index",xlab="Sewing Team08")
boxplot(sewingTeam09$actual_productivity,main="Productivity Index",xlab="Sewing Team09")
boxplot(sewingTeam10$actual_productivity,main="Productivity Index",xlab="Sewing Team10")
boxplot(sewingTeam11$actual_productivity,main="Productivity Index",xlab="Sewing Team11")
boxplot(sewingTeam12$actual_productivity,main="Productivity Index",xlab="Sewing Team12")
fit.department <- aov(actual_productivity ~ department, data=data_org)
summary(fit.department)
## Df Sum Sq Mean Sq F value Pr(>F)
## department 1 0.28 0.27958 9.246 0.00241 **
## Residuals 1195 36.13 0.03024
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
autoplot(fit.department)
Apparently the normality assumption is not met. So we can’t use ANOVA here. The alternative test we consider is the Kruskal-Wallis H Test.
p1 <- hist(sewing$actual_productivity,plot = FALSE)
p2 <- hist(finishing$actual_productivity, plot = FALSE)
plot( p1,,xlab = "Actual Producticity",main = "Histograms of Actual Producticity by Departments", col=rgb(0,0,1,1/4), xlim=c(0,1.5))
plot( p2, col=rgb(1,0,0,1/4), xlim=c(0,1.5), add=T)
To satisfy the assumption for kruskal test, the distributions of two categorical variables must have the same shape. We have shown above that they do.
kruskal.test(actual_productivity ~ department, data = data_org)
##
## Kruskal-Wallis rank sum test
##
## data: actual_productivity by department
## Kruskal-Wallis chi-squared = 27.288, df = 1, p-value = 1.753e-07
sewing$team <- as.character(sewing$team)
kruskal.test(actual_productivity ~ team, data = sewing)
##
## Kruskal-Wallis rank sum test
##
## data: actual_productivity by team
## Kruskal-Wallis chi-squared = 101.03, df = 11, p-value < 2.2e-16
test_result <- dunnTest(actual_productivity ~ team,
data = sewing,
method = "bonferroni"
)
## Warning: team was coerced to a factor.
test_result
## Dunn (1964) Kruskal-Wallis multiple comparison
## p-values adjusted with the Bonferroni method.
## Comparison Z P.unadj P.adj
## 1 1 - 10 4.38581434 1.155527e-05 7.626475e-04
## 2 1 - 11 6.25321157 4.020968e-10 2.653839e-08
## 3 10 - 11 1.83837959 6.600649e-02 1.000000e+00
## 4 1 - 12 1.60840826 1.077458e-01 1.000000e+00
## 5 10 - 12 -2.82746928 4.691752e-03 3.096556e-01
## 6 11 - 12 -4.70659942 2.518832e-06 1.662429e-04
## 7 1 - 2 2.74808085 5.994522e-03 3.956385e-01
## 8 10 - 2 -1.64502855 9.996392e-02 1.000000e+00
## 9 11 - 2 -3.49752882 4.695900e-04 3.099294e-02
## 10 12 - 2 1.16832005 2.426777e-01 1.000000e+00
## 11 1 - 3 2.34301290 1.912872e-02 1.000000e+00
## 12 10 - 3 -2.07092252 3.836604e-02 1.000000e+00
## 13 11 - 3 -3.93534926 8.307579e-05 5.483002e-03
## 14 12 - 3 0.75109332 4.525965e-01 1.000000e+00
## 15 2 - 3 -0.41875715 6.753936e-01 1.000000e+00
## 16 1 - 4 3.45599819 5.482590e-04 3.618509e-02
## 17 10 - 4 -0.97150793 3.312954e-01 1.000000e+00
## 18 11 - 4 -2.83442850 4.590773e-03 3.029910e-01
## 19 12 - 4 1.87217092 6.118296e-02 1.000000e+00
## 20 2 - 4 0.68764129 4.916787e-01 1.000000e+00
## 21 3 - 4 1.11305970 2.656828e-01 1.000000e+00
## 22 1 - 5 7.44883491 9.416812e-14 6.215096e-12
## 23 10 - 5 3.05784002 2.229386e-03 1.471394e-01
## 24 11 - 5 1.23782560 2.157807e-01 1.000000e+00
## 25 12 - 5 5.92426818 3.136912e-09 2.070362e-07
## 26 2 - 5 4.71000539 2.477102e-06 1.634888e-04
## 27 3 - 5 5.15120799 2.588140e-07 1.708172e-05
## 28 4 - 5 4.06011516 4.904852e-05 3.237202e-03
## 29 1 - 6 4.30731410 1.652489e-05 1.090643e-03
## 30 10 - 6 -0.11633471 9.073873e-01 1.000000e+00
## 31 11 - 6 -1.97178637 4.863400e-02 1.000000e+00
## 32 12 - 6 2.73481305 6.241571e-03 4.119437e-01
## 33 2 - 6 1.54281451 1.228758e-01 1.000000e+00
## 34 3 - 6 1.97200741 4.860876e-02 1.000000e+00
## 35 4 - 6 0.86264213 3.883343e-01 1.000000e+00
## 36 5 - 6 -3.20116745 1.368720e-03 9.033550e-02
## 37 1 - 7 5.79245499 6.936492e-09 4.578085e-07
## 38 10 - 7 1.45195727 1.465135e-01 1.000000e+00
## 39 11 - 7 -0.35741589 7.207805e-01 1.000000e+00
## 40 12 - 7 4.26587430 1.991209e-05 1.314198e-03
## 41 2 - 7 3.08223190 2.054547e-03 1.356001e-01
## 42 3 - 7 3.51033277 4.475462e-04 2.953805e-02
## 43 4 - 7 2.42684177 1.523090e-02 1.000000e+00
## 44 5 - 7 -1.57203638 1.159421e-01 1.000000e+00
## 45 6 - 7 1.57946885 1.142286e-01 1.000000e+00
## 46 1 - 8 5.75008114 8.920063e-09 5.887241e-07
## 47 10 - 8 1.38965360 1.646341e-01 1.000000e+00
## 48 11 - 8 -0.42861222 6.682054e-01 1.000000e+00
## 49 12 - 8 4.21619108 2.484632e-05 1.639857e-03
## 50 2 - 8 3.02738708 2.466779e-03 1.628074e-01
## 51 3 - 8 3.45728825 5.456409e-04 3.601230e-02
## 52 4 - 8 2.36860115 1.785550e-02 1.000000e+00
## 53 5 - 8 -1.64853377 9.924319e-02 1.000000e+00
## 54 6 - 8 1.51728524 1.291947e-01 1.000000e+00
## 55 7 - 8 -0.06833371 9.455200e-01 1.000000e+00
## 56 1 - 9 3.63854406 2.741837e-04 1.809613e-02
## 57 10 - 9 -0.76957052 4.415547e-01 1.000000e+00
## 58 11 - 9 -2.62272825 8.722882e-03 5.757102e-01
## 59 12 - 9 2.06371433 3.904481e-02 1.000000e+00
## 60 2 - 9 0.88259485 3.774552e-01 1.000000e+00
## 61 3 - 9 1.30704722 1.911967e-01 1.000000e+00
## 62 4 - 9 0.19956131 8.418237e-01 1.000000e+00
## 63 5 - 9 -3.84416077 1.209657e-04 7.983735e-03
## 64 6 - 9 -0.65938640 5.096477e-01 1.000000e+00
## 65 7 - 9 -2.22075245 2.636773e-02 1.000000e+00
## 66 8 - 9 -2.16175709 3.063690e-02 1.000000e+00
# library(PMCMRplus)
# result <- PMCMRplus::kwManyOneDunnTest(x = sewing$actual_productivity, g = sewing$team, data = sewing, method = "bonferroni")
# result
# sewing$team <- as.integer(sewing$team)
# library(emmeans)
# fit.sewing <- aov(actual_productivity^3 ~ team, data=sewing)
# autoplot(fit.sewing)
# sewing.mc <- emmeans(fit.sewing, "team",data=sewing)
# contrast(sewing.mc, "trt.vs.ctrl", reference = "1")
library(ggfortify)
sewing.fit <- lm(actual_productivity^3 ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=sewing)
autoplot(sewing.fit)
library(stats)
#sewing_std <- scale(sewingTeam01[,c(6:11,14)])
threshold <- 20
outlier_dates_01 <- as.Date(character(0), format = "%Y-%m-%d")
sewing_pca <- prcomp(sewingTeam01[,c(6:10,13:14)], center = TRUE,scale. = TRUE)
mahalanobis_dist <- mahalanobis(sewing_pca$x, colMeans(sewing_pca$x), cov(sewing_pca$x))
sewingTeam01 <- cbind(sewingTeam01,mahalanobis_dist)
outlier_function <- function(threshold,dataset){
outlier_dates <- as.Date(character(0), format = "%Y-%m-%d")
for (i in 1:nrow(dataset)) {
if (dataset$mahalanobis_dist[i] > threshold) {
outlier_dates <- c(outlier_dates, as.Date(dataset$date[i], format = "%Y-%m-%d"))
}
}
return(outlier_dates)
}
sewingTeam01_outlier <- outlier_function(27,sewingTeam01)
sewingTeam01_outlierrmv <- sewingTeam01[!sewingTeam01$date %in% sewingTeam01_outlier, ]
# team_no_outliers <- function(team){
# outlierrmv <- team[!team$date %in% outlier_function(27,team), ]
# paste0(names(team), "_rmv") <- oulierrmv
# return(paste0(names(team), "_rmv"))
# }
In the codes above, we try to reduce the dimensionality of data set using pca. We then set a threshold for mohalanobis_dist. We perform outlier detection for any mohalanobis distance greater than the threshold. We keep adjusting the threshold value untile most values are excluded. However, one concern is that our data, as shown in hypothesis testing, is not normally distributed. Therefore, we should consider each team seperately.
# library(ClassDiscovery)
# spca <- SamplePCA(sewing[6:15])
# spca
library(ggfortify)
testfit <- aov(actual_productivity ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=sewingTeam01)
autoplot(testfit)
summary(testfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## targeted_productivity 1 0.6844 0.6844 287.546 < 2e-16 ***
## smv 1 0.1896 0.1896 79.657 9.14e-12 ***
## wip 1 0.0140 0.0140 5.881 0.0191 *
## over_time 1 0.0058 0.0058 2.445 0.1245
## incentive 1 0.3163 0.3163 132.900 1.97e-15 ***
## no_of_style_change 1 0.0155 0.0155 6.519 0.0139 *
## no_of_workers 1 0.0142 0.0142 5.978 0.0182 *
## Residuals 48 0.1142 0.0024
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
outliers_to_exclude<- c("
2015-02-25","2015-02-24","2015-01-18")
sewingTeam01_outlierrmv <- sewingTeam01[!sewingTeam01$date %in% outliers_to_exclude, ]
testfit_rmv <- aov(actual_productivity ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=sewingTeam01_outlierrmv)
autoplot(testfit_rmv)
set.seed(123)
sample_indices <- sample(nrow(sewingTeam01), size = 0.8 * nrow(sewingTeam01))
train.data <- sewingTeam01[sample_indices, ]
test.data <- sewingTeam01[-sample_indices, ]
ln.fit = lm(actual_productivity ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=train.data)
summary(ln.fit)
##
## Call:
## lm(formula = actual_productivity ~ targeted_productivity + smv +
## wip + over_time + incentive + idle_time + idle_men + no_of_style_change +
## no_of_workers, data = train.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.143980 -0.011009 0.001305 0.010829 0.160442
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.486e-01 2.774e-01 1.257 0.2169
## targeted_productivity 1.753e-01 1.979e-01 0.886 0.3816
## smv -4.992e-03 1.866e-03 -2.676 0.0112 *
## wip -7.321e-07 3.563e-06 -0.205 0.8384
## over_time -1.333e-06 3.534e-06 -0.377 0.7082
## incentive 3.555e-03 5.353e-04 6.641 9.75e-08 ***
## idle_time NA NA NA NA
## idle_men NA NA NA NA
## no_of_style_change -9.230e-02 4.648e-02 -1.986 0.0547 .
## no_of_workers 4.052e-03 3.987e-03 1.016 0.3163
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05248 on 36 degrees of freedom
## Multiple R-squared: 0.9042, Adjusted R-squared: 0.8856
## F-statistic: 48.54 on 7 and 36 DF, p-value: < 2.2e-16
library("leaps")
library(gridExtra)
prod.gsub <- regsubsets(actual_productivity ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers, data=train.data, nbest=4, nvmax=9)
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 2 linear dependencies found
## Reordering variables and trying again:
stats <- summary(prod.gsub)
gsub.df <- data.frame(Model.Number=1:length(stats$adjr2), Adjusted.R2=stats$adjr2, BIC=stats$bic)
p1 <- ggplot(gsub.df, aes(x=Model.Number, y=Adjusted.R2)) +
geom_line() +
geom_point(color="red", size=2) +
theme_minimal() +
ylab("Adjusted R-squared") + xlab("Model Number")
p2 <- ggplot(gsub.df, aes(x=Model.Number, y=BIC)) +
geom_line() +
geom_point(color="red", size=2) +
theme_minimal() +
ylab("BIC") + xlab("Model Number")
grid.arrange(p1,p2, nrow=2)
coef(prod.gsub, which.max(gsub.df$Adjusted.R2))
## (Intercept) smv incentive idle_time
## 0.517009582 -0.002033477 0.004991662 0.000000000
max(gsub.df$Adjusted.R2)
## [1] 0.8929808
coef(prod.gsub, which.min(gsub.df$BIC))
## (Intercept) smv incentive idle_time
## 0.517009582 -0.002033477 0.004991662 0.000000000
gsub.df$Adjusted.R2[which.min(gsub.df$BIC)]
## [1] 0.8929808
fit1 <- lm(actual_productivity ~ smv+incentive, data=train.data)
test.predictions <- predict(fit1, newdata=test.data)
# Calculate observed - predicted bodyfat for test data
residuals <- test.data$actual_productivity - test.predictions
# Calculate and display the residual std error
test.rse <- sqrt(mean(residuals^2))
test.rse
## [1] 0.05199166
# Set training control method as 5-fold CV
library("caret")
## Loading required package: lattice
train.control <- trainControl(method = "cv", number = 5)
# Train the model
kfoldCV.model <- train(actual_productivity ~ targeted_productivity+smv+wip+over_time+incentive+idle_time+idle_men+no_of_style_change+no_of_workers,
data = sewingTeam01,
method = "lm",
trControl = train.control)
## Warning in predict.lm(modelFit, newdata): prediction from rank-deficient fit;
## attr(*, "non-estim") has doubtful cases
## Warning in predict.lm(modelFit, newdata): prediction from rank-deficient fit;
## attr(*, "non-estim") has doubtful cases
## Warning in predict.lm(modelFit, newdata): prediction from rank-deficient fit;
## attr(*, "non-estim") has doubtful cases
## Warning in predict.lm(modelFit, newdata): prediction from rank-deficient fit;
## attr(*, "non-estim") has doubtful cases
## Warning in predict.lm(modelFit, newdata): prediction from rank-deficient fit;
## attr(*, "non-estim") has doubtful cases
# Display results
kfoldCV.model
## Linear Regression
##
## 56 samples
## 9 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 44, 44, 45, 46, 45
## Resampling results:
##
## RMSE Rsquared MAE
## 0.1085456 0.6662927 0.05798348
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
# Set training control method as 5-fold CV
library("caret")
train.control <- trainControl(method = "cv", number = 5)
# Train the model
kfoldCV.model <- train(actual_productivity ~ smv+incentive,
data = sewingTeam01,
method = "lm",
trControl = train.control)
# Display results
kfoldCV.model
## Linear Regression
##
## 56 samples
## 2 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 45, 45, 45, 44, 45
## Resampling results:
##
## RMSE Rsquared MAE
## 0.06740587 0.8586276 0.04656862
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
training_data_frame <- as.data.frame(train.data)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:reshape':
##
## expand
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-8
lambda_seq <- 10^seq(10, -2, length = 100)
cv_model <- cv.glmnet(
x = as.matrix(training_data_frame[6:14]),
y = train.data$actual_productivity,
alpha = 1,
lambda = lambda_seq
)
# Get the best lambda value
best_lambda <- cv_model$lambda.min
best_lambda
## [1] 0.0231013
lasso_model <- glmnet(
x = as.matrix(training_data_frame[6:14]),
y = train.data$actual_productivity,
alpha = 1,
lambda = best_lambda
)
lasso_coefficients <- coef(lasso_model)
lasso_coefficients
## 10 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 0.556784302
## targeted_productivity .
## smv .
## wip .
## over_time .
## incentive 0.003765322
## idle_time .
## idle_men .
## no_of_style_change -0.071567818
## no_of_workers .
test_data_frame <- as.data.frame(test.data)
lr.predictions <- predict(lasso_model, newx = as.matrix(test_data_frame[6:14]))
lr.residuals <- test.data$actual_productivity - lr.predictions
lr.rse <- sqrt(mean(lr.residuals^2))
lr.rse
## [1] 0.04712888
rmse <- sqrt(mean((test.data$actual_productivity- lr.predictions)^2))
mae <- mean(abs(test.data$actual_productivity- lr.predictions))
cat("RMSE:", rmse, "\n")
## RMSE: 0.04712888
cat("MAE:", mae, "\n")
## MAE: 0.03483174
library(glmnet)
library(caret)
# Assuming you have a data frame named sewingTeam01
set.seed(123) # for reproducibility
# Define the formula
formula <- as.formula("actual_productivity ~ targeted_productivity + smv + wip + over_time + incentive + idle_time + idle_men + no_of_style_change + no_of_workers")
# Create a model specification for Lasso regression
lasso_model <- train(
formula,
data = sewingTeam01,
method = "glmnet",
trControl = trainControl(method = "cv", number = 5), # 5-fold cross-validation
tuneGrid = expand.grid(alpha = 1, lambda = best_lambda ) # Regularization path
# alpha = 1,
# lambda =
)
# Display the results
print(lasso_model)
## glmnet
##
## 56 samples
## 9 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 44, 46, 45, 45, 44
## Resampling results:
##
## RMSE Rsquared MAE
## 0.06513258 0.8750305 0.03873783
##
## Tuning parameter 'alpha' was held constant at a value of 1
## Tuning
## parameter 'lambda' was held constant at a value of 0.0231013
# Assuming you have already fitted the Lasso model (lasso_model)
# Extract coefficients
lasso_coefficients_kfold <- coef(lasso_model$finalModel, s = lasso_model$bestTune$lambda)
# Display coefficients
print(lasso_coefficients_kfold)
## 10 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 0.461691519
## targeted_productivity 0.127192553
## smv .
## wip .
## over_time .
## incentive 0.003732186
## idle_time .
## idle_men .
## no_of_style_change -0.030433102
## no_of_workers .
testfit <- aov(actual_productivity ~ smv+incentive, data=sewingTeam01)
autoplot(testfit)
summary(testfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## smv 1 0.1575 0.1575 41.87 3.2e-08 ***
## incentive 1 0.9973 0.9973 265.08 < 2e-16 ***
## Residuals 53 0.1994 0.0038
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1